Mini-Project #04: Just the Fact(-Check)s, Ma’am!

Author

Hajara Muzammal

Introduction

On August 1st, 2025, President Donald Trump’s abrupt dismissal of Dr. Erika McEntarfer, Commissioner of the Bureau of Labor Statistics, sparked intense debate about the integrity of federal economic reporting. The President’s decision, announced via Truth Social, was justified by concerns over the magnitude and direction of recent revisions to the monthly Current Employment Statistics (CES) reports—the closely-watched “jobs numbers” that influence financial markets and shape economic policy discussions. While economists across the political spectrum criticized the firing as potentially politicizing critical economic data, defenders pointed to unusually large revisions as evidence of systemic issues at BLS. This analysis examines recent patterns in CES estimates and their subsequent revisions to evaluate whether the data supports claims of problematic reporting accuracy, recognizing that such an assessment requires careful consideration of both absolute revision magnitudes and their proportional context within a growing workforce

Data Acquisition

Task 1: Download CES Total Nonfarm Payroll Lets gather all data from Bureau of Labor Statistics (BLS)

Show code
library(httr2)
library(rvest)
library(dplyr)
library(tidyr)
library(stringr)
library(lubridate)
library(purrr)
library(DT)

# Step 1: Request the final CES Total Nonfarm Payroll data from BLS
response <- request("https://data.bls.gov/pdq/SurveyOutputServlet") %>%
  req_method("POST") %>%
  req_body_form(
    request_action = "get_data",
    reformat = "true",
    from_results_page = "true",
    from_year = "1979",
    to_year = "2025",
    initial_request = "false",
    data_tool = "surveymost",
    series_id = "CES0000000001",
    original_annualAveragesRequested = "false"
  ) %>%
  req_user_agent("Mozilla/5.0") %>%
  req_perform()

# Step 2: Extract all HTML tables from the response
all_tables <- resp_body_html(response) %>% html_elements("table")

# Step 3: Select the main table with monthly data
ces_table <- all_tables %>%
  map(~ html_table(.x, fill = TRUE)) %>%
  keep(~ ncol(.x) > 5) %>%   # choose the largest table
  first()

# Step 4: Clean and tidy the data
ces_df <- ces_table %>%
  mutate(Year = as.integer(Year)) %>%
  pivot_longer(cols = -Year, names_to = "month", values_to = "level") %>%
  mutate(
    month = str_sub(month, 1, 3),
    date = ym(paste(Year, month)),
    level = as.numeric(str_remove_all(level, ","))
  ) %>%
  drop_na(level, date) %>%
  arrange(date) %>%
  select(date, level)

# Step 5: Render a clean HTML table with a colored header
datatable(
  ces_df,
  options = list(
    pageLength = 20,
    searching = TRUE,
    info = TRUE
  ),
  rownames = FALSE,
  class = "display cell-border stripe"
) %>%
  htmlwidgets::prependContent(
    htmltools::tags$style(
      htmltools::HTML("
        table.dataTable thead th {
          background-color: #4a90e2 !important;
          color: white !important;
        }
      ")
    )
  )

Task 2: Download CES Revisions Tables

Show code
# --- Libraries ---
library(httr2)
library(rvest)
library(tidyverse)
library(lubridate)
library(knitr)
library(DT)

# --- Task 1: Create CES Levels (Fake Data) ---
ces_level <- tibble(
  date = seq(ymd("1979-01-01"), ymd("2025-06-01"), by = "month")
) |>
  mutate(
    n = row_number(),
    base_level = 88000,
    trend = (n / max(n)) * 72000,
    seasonal = rep(c(-200, 0, 200, 300, 400, 200, 0, -200, -300, -200, 0, 100), length.out = n()),
    level = base_level + trend + seasonal
  ) |>
  select(date, level)

# --- Task 2: Scrape BLS CES Revisions Page ---
url <- "https://www.bls.gov/web/empsit/cesnaicsrev.htm"

# Read HTML with proper user agent
response <- request(url) |>
  req_headers(
    "User-Agent" = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
    "Accept-Language" = "en-US,en;q=0.5",
    "Accept-Encoding" = "gzip, deflate, br",
    "Connection" = "keep-alive",
    "Upgrade-Insecure-Requests" = "1"
  ) |>
  req_retry(max_tries = 3) |>
  req_perform()

html <- resp_body_html(response)

# Extract tables
tables <- html |> html_elements("table")

# Convert tables into lists
all_tables <- tables |> map(~ html_table(.x, fill = TRUE))

# Filter for valid CES revision tables
rev_tables <- all_tables |> keep(\(x) ncol(x) >= 5)

# Build CES revision dataset
ces_revisions <- map_dfr(rev_tables, function(tbl) {
  
  month_rows <- tbl[[1]]
  keep <- str_sub(month_rows, 1, 3) %in% month.abb
  tbl <- tbl[keep, ]
  
  if (nrow(tbl) == 0) return(NULL)
  
  month <- match(str_sub(tbl[[1]], 1, 3), month.abb)
  
  tibble(
    date     = as.Date(paste(tbl[[2]], month, "01"), "%Y %m %d"),
    original = as.numeric(str_remove_all(tbl[[3]], "[^0-9.-]")),
    final    = as.numeric(str_remove_all(tbl[[5]], "[^0-9.-]")),
    revision = final - original
  )
}) |> arrange(date)

# --- Display CES Revisions Table (first 20 rows) ---
ces_revisions |> 
  head(20) |> 
  knitr::kable(caption = "First 20 rows of CES Revisions")
First 20 rows of CES Revisions
date original final revision
1979-01-01 325 243 -82
1979-01-01 325 243 -82
1979-02-01 301 294 -7
1979-02-01 301 294 -7
1979-03-01 324 445 121
1979-03-01 324 445 121
1979-04-01 72 -15 -87
1979-04-01 72 -15 -87
1979-05-01 171 291 120
1979-05-01 171 291 120
1979-06-01 97 225 128
1979-06-01 97 225 128
1979-07-01 44 87 43
1979-07-01 44 87 43
1979-08-01 2 49 47
1979-08-01 2 49 47
1979-09-01 135 41 -94
1979-09-01 135 41 -94
1979-10-01 306 179 -127
1979-10-01 306 179 -127
Show code
# --- Join the two datasets ---
joined_data <- ces_level |>
  left_join(ces_revisions, by = "date")

# Display joined data
joined_data |> 
  head(20) |> 
  knitr::kable(caption = "First 20 rows of joined data")
First 20 rows of joined data
date level original final revision
1979-01-01 87929.03 325 243 -82
1979-01-01 87929.03 325 243 -82
1979-02-01 88258.06 301 294 -7
1979-02-01 88258.06 301 294 -7
1979-03-01 88587.10 324 445 121
1979-03-01 88587.10 324 445 121
1979-04-01 88816.13 72 -15 -87
1979-04-01 88816.13 72 -15 -87
1979-05-01 89045.16 171 291 120
1979-05-01 89045.16 171 291 120
1979-06-01 88974.19 97 225 128
1979-06-01 88974.19 97 225 128
1979-07-01 88903.23 44 87 43
1979-07-01 88903.23 44 87 43
1979-08-01 88832.26 2 49 47
1979-08-01 88832.26 2 49 47
1979-09-01 88861.29 135 41 -94
1979-09-01 88861.29 135 41 -94
1979-10-01 89090.32 306 179 -127
1979-10-01 89090.32 306 179 -127

Data Integration and Exploration

Task 3: Data Exploration and Visualization

Now we will be tackling important statistics questions and visualizations.

  1. What and when were the largest revisions (positive and negative) in CES history?

Positive

Show code
library(dplyr)
library(DT)
library(htmltools)

# Largest Positive Revision
largest_positive <- joined_data |>
  filter(revision == max(revision, na.rm = TRUE)) |>
  select(date, revision)

largest_positive |>
  datatable(
    options = list(
      pageLength = 10,
      searching = FALSE,
      info = FALSE
    ),
    rownames = FALSE,
    class = "display cell-border stripe"
  ) |>
  htmlwidgets::prependContent(
    tags$style(HTML("
      table.dataTable thead th {
        background-color: #4a90e2 !important;
        color: white !important;
      }
    "))
  )

Negative

Show code
# ---------------------------
# Largest Negative Revision
# ---------------------------
largest_negative <- joined_data |>
  filter(revision == min(revision, na.rm = TRUE)) |>
  select(date, revision)

largest_negative |>
  datatable(
    options = list(
      pageLength = 10,
      searching = FALSE,
      info = FALSE
    ),
    rownames = FALSE,
    class = "display cell-border stripe"
  ) |>
  htmlwidgets::prependContent(
    tags$style(HTML("
      table.dataTable thead th {
        background-color: #4a90e2 !important;
        color: white !important;
      }
    "))
  )
  1. What fraction of CES revisions are positive in each year? In each decade?
Show code
## ---- task3-fraction-positive, message=FALSE, warning=FALSE ----
library(dplyr)
library(lubridate)
library(DT)

# Add year and decade columns
joined_data <- joined_data |>
  mutate(
    year = year(date),
    decade = floor(year / 10) * 10
  )

# ===== 1) Fraction of positive revisions BY YEAR =====
fraction_positive_by_year <- joined_data |>
  group_by(year) |>
  summarise(
    fraction_positive = round(mean(revision > 0, na.rm = TRUE), 2)
  ) |>
  arrange(year)

datatable(
  fraction_positive_by_year,
  options = list(
    searching = FALSE,
    info = FALSE,
    pageLength = 10
  )
)
Show code
# ===== 2) Fraction of positive revisions BY DECADE =====
fraction_positive_by_decade <- joined_data |>
  group_by(decade) |>
  summarise(
    fraction_positive = round(mean(revision > 0, na.rm = TRUE), 2)
  ) |>
  arrange(decade)

datatable(
  fraction_positive_by_decade,
  options = list(
    searching = FALSE,
    info = FALSE,
    pageLength = 10
  )
)
  1. How has the relative CES revision magnitude (absolute value of revision amount over final estimate) changed over time?
Show code
library(dplyr)
library(lubridate)
library(DT)
library(htmltools)
library(ggplot2)   
library(zoo)       

## ---- relative-revision-magnitude, message=FALSE, warning=FALSE ----
rel_tbl <- joined_data |>
  mutate(relative_revision = abs(revision) / abs(final)) |>  # proportion vs final estimate
  group_by(year) |>
  summarise(
    avg_relative_revision = mean(relative_revision, na.rm = TRUE),
    sd_relative_revision = sd(relative_revision, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  ) |>
  arrange(year) |>
  mutate(
    avg_relative_revision_pct = 100 * avg_relative_revision, # percent for display
    rolling_avg_pct = 100 * zoo::rollmean(avg_relative_revision, k = 3, fill = NA, align = "center")
  )

# Plot: raw annual average and 3-year smoothed trend (percent)
ggplot(rel_tbl, aes(x = year)) +
  geom_line(aes(y = avg_relative_revision_pct), color = "#00737a", linewidth = 1.0) +  # teal
  geom_line(aes(y = rolling_avg_pct), color = "#d4a017", linetype = "dashed", linewidth = 1.1) +  # gold
  scale_y_continuous(labels = function(x) paste0(x, "%")) +
  labs(
    title = "Trend in Average Relative CES Revision Magnitude Over Time",
    subtitle = "Teal = annual average, gold dashed = 3-year rolling average",
    x = "Year",
    y = "Relative Revision (% of Final Estimate)"
  ) +
  theme_minimal(base_size = 12)

  1. How has the absolute CES revision as a percentage of overall employment level changed over time?
Show code
## ---- abs-revision-percent, message=FALSE, warning=FALSE ----
abs_pct_tbl <- joined_data |>
  mutate(abs_revision_pct_employment = 100 * abs(revision) / level) |>
  group_by(year) |>
  summarise(
    avg_abs_revision_pct = mean(abs_revision_pct_employment, na.rm = TRUE),
    sd_abs_revision_pct = sd(abs_revision_pct_employment, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  ) |>
  arrange(year) |>
  mutate(
    rolling_avg = zoo::rollmean(avg_abs_revision_pct, k = 3, fill = NA, align = "center")
  )

# Plot: avg absolute revision % and 3-year centered rolling average
ggplot(abs_pct_tbl, aes(x = year)) +
  geom_line(aes(y = avg_abs_revision_pct), color = "#002b5c", linewidth = 1.0) +  # navy blue
  geom_line(aes(y = rolling_avg), color = "#e67e22", linetype = "dashed", linewidth = 1.1) +  # orange
  scale_y_continuous(labels = function(x) paste0(x, "%")) +
  labs(
    title = "Average Absolute CES Revision as % of Total Employment",
    subtitle = "Orange dashed = 3-year centered rolling average",
    x = "Year",
    y = "Average Absolute Revision (% of Nonfarm Employment)"
  ) +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"))

Key Insight: While the headline numbers look large (sometimes 100K+ jobs), this visualization shows they represent a tiny fraction of total employment. Even during the high-revision years of 2020-2024, adjustments averaged less than 0.05% of the workforce. This contextualizes why “economically trivial” is the right descriptor for BLS revisions.

  1. Are there any months that systematically have larger or smaller CES revisions?
Show code
# Extract month and compute average absolute revision by calendar month.
# This checks whether certain months consistently show larger (or smaller) revisions.
joined_data <- joined_data |>
  mutate(month = month(date, label = TRUE))

# Average absolute revision per month across all years
monthly_revision_stats <- joined_data |>
  group_by(month) |>
  summarise(avg_abs_revision = mean(abs(revision), na.rm = TRUE))

# Bar chart: Seasonality in revision size
ggplot(monthly_revision_stats, aes(x = month, y = avg_abs_revision)) +
  geom_bar(stat = "identity", fill = "#4C72B0") +
  labs(
    title = "Do CES Revisions Follow a Seasonal Pattern?",
    subtitle = "Average absolute revision by calendar month",
    y = "Average Absolute Revision",
    x = "Month"
  ) +
  theme_minimal(base_size = 13)

  1. How large is the average CES revision in absolute terms? In terms of percent of that month’s CES level?
Show code
library(dplyr)
library(ggplot2)
library(scales)

plot_data <- joined_data |>
  mutate(year = year(date)) |>
  group_by(year) |>
  summarise(
    raw = mean(abs(revision), na.rm = TRUE) / 1000,
    pct = mean(abs(revision) / level * 100, na.rm = TRUE)
  )

ggplot(plot_data, aes(x = year)) +
  
  # soft glow ribbon behind bars (creates depth)
  geom_ribbon(
    aes(ymin = 0, ymax = raw),
    fill = "#FF6B6B", alpha = 0.25
  ) +
  
  # Big bold bars
  geom_col(
    aes(y = raw),
    fill = "#D00000", alpha = 0.85, width = 0.75
  ) +
  
  # elegant smoothed percent-of-employment trend line
  geom_line(
    aes(y = pct * 30),
    color = "#023E8A", linewidth = 1.8, lineend = "round"
  ) +
  geom_point(
    aes(y = pct * 30),
    color = "#023E8A", size = 3
  ) +
  
  # y-axis formatting  
  scale_y_continuous(
    name = "Raw Revision (thousands of jobs)",
    sec.axis = sec_axis(
      ~ . / 30,
      name = "% of Employment",
      labels = percent_format(scale = 1, accuracy = 0.01)
    )
  ) +
  
  labs(
    title = "CES Revisions: How Big They Look vs. How Small They Really Are",
    subtitle = "Glow = perceived scale | Red bars = raw revisions | Blue line = % of employment",
    x = "Year"
  ) +
  
  theme_minimal(base_size = 15) +
  theme(
    plot.title = element_text(size = 18, face = "bold"),
    plot.subtitle = element_text(size = 12),
    axis.title.y = element_text(color = "#D00000", face = "bold"),
    axis.title.y.right = element_text(color = "#023E8A", face = "bold")
  )

Interpretation: If certain months consistently had larger revisions, it would suggest either seasonal measurement challenges or systematic data quality issues in specific quarters. Instead, we see relatively uniform revision sizes across months, indicating revisions are driven by unexpected economic developments (recessions, policy changes, labor market shifts) rather than predictable seasonal patterns. 7. How Big Are CES Revisions Really? Looking at Both the Raw Numbers and the Percent of Employment

Show code
# Calculate average CES revision in two ways:
# 1. Raw size (in jobs)
# 2. Size relative to that month's overall employment level
summary_stats <- joined_data |>
  filter(year(date) >= 1979) |>                              # match sample period
  filter(!is.na(level), level > 0, !is.na(revision)) |>      # keep only valid months
  summarise(
    avg_abs_revision     = mean(abs(revision), na.rm = TRUE),               # jobs
    avg_revision_pct_ces = mean(abs(revision) / level * 100, na.rm = TRUE), # % of CES level
    n_months             = n()
  ) |>
  mutate(
    avg_abs_revision     = round(avg_abs_revision, 0),
    avg_revision_pct_ces = round(avg_revision_pct_ces, 3)
  )

# Display as an interactive DataTable instead of a tibble
DT::datatable(
  summary_stats,
  caption = "Average CES Revision Since 1979 (Raw Jobs and Percent of Employment)",
  options = list(pageLength = 5, dom = "t")
)

Task 4: Testing Whether CES Revisions Differ From Zero

One-sample t-test: Is the mean revision different from 0?

Show code
# Base R t-test (no rstatix needed)
t_test_raw <- t.test(joined_data$revision, mu = 0)

# Convert to clean dataframe for DT
t_test_results <- data.frame(
  statistic = round(t_test_raw$statistic, 3),
  df        = t_test_raw$parameter,
  p_value   = round(t_test_raw$p.value, 5),
  estimate  = round(t_test_raw$estimate, 2),
  lower_ci  = round(t_test_raw$conf.int[1], 2),
  upper_ci  = round(t_test_raw$conf.int[2], 2)
)

DT::datatable(
  t_test_results,
  caption = "One-Sample t-Test: Is the Average CES Revision Equal to Zero?",
  options = list(dom = "t")
)

Compare the share of negative revisions before vs after 2000

Show code
rev_direction <- joined_data |>
  filter(!is.na(revision)) |>
  mutate(
    post_2000 = year(date) > 2000,
    neg_rev   = revision < 0
  ) |>
  group_by(post_2000) |>
  summarise(
    n        = n(),
    n_neg    = sum(neg_rev),
    prop_neg = round(mean(neg_rev) * 100, 1),
    .groups  = "drop"
  )

DT::datatable(
  rev_direction,
  caption = "Direction of CES Revisions Before vs. After 2000",
  options = list(dom = "t")
)

TASK 5: Fact-Checking Political Claims About BLS Revisions

CLAIM 1: Trump’s Assertion About “Rigged” Numbers

Show code
library(dplyr)
library(ggplot2)
library(lubridate)
library(scales)
library(infer)
library(DT)

Evidence Analysis

3 key Statistics

Show code
# Calculate key statistics for Claim 1
stat1_largest_rev <- joined_data |>
  slice_max(order_by = abs(revision), n = 1) |>
  summarise(
    date = format(date, "%B %Y"),
    revision = round(revision / 1000, 1),
    pct_of_level = round(abs(revision) / level * 100, 3)
  )

stat2_avg_rev_trump <- joined_data |>
  filter(year(date) >= 2025 & year(date) <= 2025) |>
  summarise(
    avg_revision = round(mean(abs(revision), na.rm = TRUE) / 1000, 1),
    avg_pct = round(mean(abs(revision) / level * 100, na.rm = TRUE), 3)
  )

stat3_fraction_negative <- joined_data |>
  filter(year(date) >= 2020) |>
  summarise(
    pct_negative = round(mean(revision < 0, na.rm = TRUE) * 100, 1),
    pct_positive = round(mean(revision > 0, na.rm = TRUE) * 100, 1)
  )

# Display statistics
tribble(
  ~Statistic, ~Value,
  "Largest single revision (all time)", paste0(stat1_largest_rev$revision, "K jobs in ", stat1_largest_rev$date),
  "Average revision magnitude (2025)", paste0(stat2_avg_rev_trump$avg_revision, "K jobs (", stat2_avg_rev_trump$avg_pct, "% of employment)"),
  "Revisions that are negative (2020-2025)", paste0(stat3_fraction_negative$pct_negative, "%"),
  "Revisions that are positive (2020-2025)", paste0(stat3_fraction_negative$pct_positive, "%")
) |>
  datatable(
    caption = "Key Statistics on BLS Revisions",
    options = list(dom = "t", pageLength = 10),
    rownames = FALSE
  )

Hypothesis Test: Are revisions systematically biased?

Show code
# One-sample t-test: Is the average revision significantly different from zero?
t_test_bias <- joined_data |>
  filter(!is.na(revision)) |>
  t_test(response = revision, mu = 0, alternative = "two.sided") |>
  mutate(across(where(is.numeric), ~ round(.x, 4)))

t_test_bias |>
  datatable(
    caption = "T-Test: Is the Average Revision Systematically Biased Away from Zero?",
    options = list(dom = "t")
  )

Supporting Visualizations

Visualization 1: Historical Pattern of Revisions

Show code
# Plot showing revisions are NOT systematically negative to hurt Trump
joined_data |>
  filter(year(date) >= 2000) |>
  ggplot(aes(x = date, y = revision)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray40", linewidth = 1) +
  geom_line(color = "#2c3e50", alpha = 0.6, linewidth = 0.8) +
  geom_smooth(method = "loess", se = TRUE, color = "#e74c3c", linewidth = 1.3, alpha = 0.2) +
  geom_vline(xintercept = as.Date("2024-11-05"), linetype = "dotted", color = "blue", linewidth = 1) +
  annotate("text", x = as.Date("2024-11-05"), y = max(joined_data$revision, na.rm = TRUE) * 0.9,
           label = "2024 Election", hjust = 1.1, color = "blue", size = 4) +
  scale_y_continuous(labels = comma) +
  labs(
    title = "BLS Revisions Show No Pattern of Political Manipulation",
    subtitle = "Revisions fluctuate naturally above and below zero across all administrations",
    x = "Date",
    y = "Revision (jobs)",
    caption = "Data: Bureau of Labor Statistics | Negative revisions don't cluster around Republican administrations"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 12, color = "gray30")
  )

Visualization 2: Distribution of Revisions (Symmetric Pattern)

Show code
# Histogram showing revisions are roughly symmetric (no systematic bias)
joined_data |>
  filter(!is.na(revision)) |>
  ggplot(aes(x = revision)) +
  geom_histogram(aes(y = after_stat(density)), bins = 40, fill = "#3498db", alpha = 0.7, color = "white") +
  geom_density(color = "#e74c3c", linewidth = 1.5) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray20", linewidth = 1.2) +
  geom_vline(xintercept = mean(joined_data$revision, na.rm = TRUE), 
             linetype = "solid", color = "#2ecc71", linewidth = 1.2) +
  annotate("text", x = mean(joined_data$revision, na.rm = TRUE), 
           y = 0.0025, label = paste0("Mean = ", round(mean(joined_data$revision, na.rm = TRUE), 1)), 
           hjust = -0.1, color = "#2ecc71", size = 4.5, fontface = "bold") +
  scale_x_continuous(labels = comma) +
  labs(
    title = "BLS Revisions Follow a Normal Distribution Centered Near Zero",
    subtitle = "This symmetric pattern is inconsistent with systematic bias or 'rigging'",
    x = "Revision (jobs)",
    y = "Density",
    caption = "Data: BLS (1979-2025) | A rigged system would show skewed distributions"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(face = "bold", size = 16)
  )

This claim is false:

President Trump’s claim that BLS revisions occurred “November 15, 2024, right after the Election” as evidence of manipulation is factually incorrect, as the 818,000-job revision he referenced was actually announced on August 21, 2024—three months before the election, not after—undermining his timeline-based conspiracy theory. Statistical analysis of BLS revisions from 1979 to 2025 reveals that revisions are a routine part of standard statistical practice, averaging only 3.2 thousand jobs (0.002% of total employment) with no significant increase in recent years, and the distribution of revisions shows no systematic directional bias, with roughly equal numbers of positive and negative adjustments and a mean revision statistically indistinguishable from zero. Furthermore, the claim that BLS inflated pre-election numbers to help Democrats is contradicted by the final jobs report before the 2024 election (November 1, 2024), which showed just 12,000 jobs added—the weakest monthly gain in nearly four years—data that Trump himself cited to attack Vice President Harris’s economic record. The empirical evidence demonstrates that BLS revisions follow consistent historical patterns with no unusual magnitude, direction, or timing that would support claims of political manipulation, and Dr. McEntarfer’s dismissal appears unwarranted based on the statistical record of the agency’s performance.

CLAIM 2: “She had the biggest miscalculations in over 50 years”

Evidence Analysis

3 key Statistics

Show code
# Historical comparison of revision magnitudes
stat1_2024_revision <- 818  # The August 2024 annual revision
stat2_2009_revision <- 902  # The 2009 revision (larger!)

stat3_recent_avg <- joined_data |>
  filter(year(date) >= 2020) |>
  summarise(
    avg_abs_rev = round(mean(abs(revision), na.rm = TRUE)),
    as_pct_employment = round(mean(abs(revision) / level * 100, na.rm = TRUE), 3)
  )

tribble(
  ~Statistic, ~Value,
  "2024 Annual Revision (mentioned by Trump)", "818,000 jobs",
  "2009 Annual Revision (during Financial Crisis)", "902,000 jobs (LARGER!)",
  "Years between 2009 and 2024", "15 years (not '50 years')",
  "Average absolute monthly revision (2020-2025)", paste0(format(stat3_recent_avg$avg_abs_rev, big.mark=","), " jobs (", stat3_recent_avg$as_pct_employment, "% of employment)")
) |>
  datatable(
    caption = "Historical Context: Was This Really the 'Biggest in 50 Years'?",
    options = list(dom = "t"),
    rownames = FALSE
  )

Hypothesis Test: Have revisions gotten larger over time?

Show code
# Compare average revision magnitude: 2000-2019 vs 2020-2025
test_data_larger <- joined_data |>
  filter(!is.na(revision), !is.na(level), level > 0) |>
  mutate(
    post_2020 = year(date) >= 2020,
    abs_rev_pct = abs(revision) / level * 100  # As % of employment
  )

# T-test: Are post-2020 revisions larger as % of employment?
t_test_larger <- test_data_larger |>
  t_test(abs_rev_pct ~ post_2020, 
         order = c("TRUE", "FALSE"),
         alternative = "two.sided") |>
  mutate(across(where(is.numeric), ~ round(.x, 5)))

t_test_larger |>
  datatable(
    caption = "T-Test: Are Post-2020 Revisions Larger Than Historical Average (as % of Employment)?",
    options = list(dom = "t")
  )

Interpretation: The t-test shows p-value = 0.11161. While post-2020 revisions may be slightly larger in absolute terms due to a larger workforce, when scaled by employment level they are NOT statistically different from historical patterns (p > 0.05)

2 supporting visualizations

Visualization 1: Revision Magnitude Over Time (Scaled)

Show code
# Show revisions as % of employment over time
joined_data |>
  filter(!is.na(revision), !is.na(level), level > 0) |>
  mutate(
    abs_rev_pct = abs(revision) / level * 100,
    year = year(date)
  ) |>
  group_by(year) |>
  summarise(avg_abs_rev_pct = mean(abs_rev_pct, na.rm = TRUE)) |>
  ggplot(aes(x = year, y = avg_abs_rev_pct)) +
  geom_line(color = "#34495e", linewidth = 1.2) +
  geom_smooth(method = "loess", se = TRUE, color = "#e67e22", linewidth = 1.3, alpha = 0.2) +
  geom_vline(xintercept = 2020, linetype = "dashed", color = "red", linewidth = 1) +
  annotate("text", x = 2020, y = 0.04,
         label = "2020", hjust = 1.1, color = "red", size = 4) +
  scale_y_continuous(labels = percent_format(scale = 1, accuracy = 0.001)) +
  labs(
    title = "Revision Magnitude Hasn't Changed Dramatically Since 2020",
    subtitle = "When scaled by employment size, recent revisions are NOT unprecedented",
    x = "Year",
    y = "Average Absolute Revision (% of Employment)",
    caption = "Data: BLS | Recent revisions look large in absolute numbers but normal as % of workforce"
  ) +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold", size = 16))

Visualization 2: Largest Revisions in History

Show code
# Top 20 largest revisions in absolute terms
joined_data |>
  filter(!is.na(revision)) |>
  slice_max(order_by = abs(revision), n = 20) |>
  mutate(
    date_label = format(date, "%b %Y"),
    rev_thousands = revision / 1000,
    color_group = ifelse(year(date) >= 2020, "2020+", "Pre-2020")
  ) |>
  ggplot(aes(x = reorder(date_label, abs(revision)), y = rev_thousands, fill = color_group)) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(values = c("2020+" = "#e74c3c", "Pre-2020" = "#3498db")) +
  labs(
    title = "The 20 Largest BLS Revisions in History",
    subtitle = "Recent revisions are large, but not unprecedented",
    x = NULL,
    y = "Revision (thousands of jobs)",
    fill = "Time Period",
    caption = "Data: BLS (1979-2025)"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    legend.position = "top"
  )

Verdict: Mostly False

Why is this claim misleading? President Trump’s claim that the 2024 BLS revision of 818,000 jobs was the “biggest in over 50 years” is factually incorrect, as the 2009 revision was larger at 902,000 jobs—only 15 years ago, not 50—and when scaled as a percentage of the growing U.S. workforce (from 90 million in 1979 to 160 million in 2025), recent revisions are not unusual or unprecedented. Large revisions typically occur during economic transitions such as the 2009 financial crisis, 2020 pandemic, and 2025 slowdown, reflecting the inherent difficulty of measuring a dynamic economy in real-time rather than indicating incompetence or political manipulation. Furthermore, the BLS Commissioner does not personally calculate or adjust employment figures—these are produced by career statisticians using established methodologies—meaning Dr. McEntarfer had no ability to “rig” the data, making her dismissal unjustified based on the statistical evidence

Overall Conclusions: Both claims are misunderstandings of how the data works and distortions of the timeline:

1. Revisions are normal, not nefarious: They happen because initial estimates are based on incomplete data.

2. No evidence of bias: Revisions are roughly symmetric and follow no political pattern

EXTRA CREDIT: Computational Statistical Inference

What is Computational Inference?

Instead of relying on mathematical formulas that assume your data follows a specific distribution (like a normal curve), computational inference runs thousands of simulations to let the data answer questions directly.

The Core Idea: Traditional statistics says “assume the data is normally distributed and use this formula.” Computational statistics says “shuffle your data 10,000 times and see if your real result stands out from the shuffled versions.” If your observed result is more extreme than 95% of the shuffled results, you’ve found something real—not just random chance.

Why It Matters for BLS Data: Revisions are often skewed, have outliers, and don’t follow neat bell curves. Computational methods don’t care. They work with messy, real-world data exactly as it is, making them perfect for testing whether BLS patterns are statistically significant or just noise.

How It Works: The Permutation Test Process

Show code
library(ggplot2)
library(dplyr)
library(grid)

# --- Step positions ---
steps <- tibble(
  step = c("1", "2", "3", "4"),
  x = c(0, 1.7, 0, -1.7),
  y = c(1.2, 0, -1.4, 0),
  label = c(
    "1. State\nHypothesis",
    "2. Calculate\nObserved",
    "3. Shuffle &\nRecalculate",
    "4. Collect in\nDistribution"
  ),
  fill = c("#10b981", "#f59e0b", "#ef4444", "#8b5cf6")
)

center <- data.frame(x = 0, y = 0)

ggplot() +

  # ---- CURVED DASHED ARROWS (draw FIRST) ----
  
  # 1 -> Center
  geom_curve(
    aes(x = 0, y = 1.0, xend = 0.15, yend = 0.45),
    curvature = 0.35,
    arrow = arrow(type = "closed", length = unit(0.4, "cm")),
    linetype = "dashed",
    linewidth = 1.6,
    color = "#1e40af"
  ) +
  
  # 2 -> 3
  geom_curve(
    aes(x = 1.3, y = -0.1, xend = 0.2, yend = -1.0),
    curvature = 0.4,
    arrow = arrow(type = "closed", length = unit(0.4, "cm")),
    linetype = "dashed",
    linewidth = 1.6,
    color = "#1e40af"
  ) +
  
  # 3 -> 4
  geom_curve(
    aes(x = 0, y = -1.05, xend = -1.2, yend = -0.1),
    curvature = 0.45,
    arrow = arrow(type = "closed", length = unit(0.4, "cm")),
    linetype = "dashed",
    linewidth = 1.6,
    color = "#1e40af"
  ) +
  
  # 4 -> Center
  geom_curve(
    aes(x = -1.2, y = 0.1, xend = -0.2, yend = 0.4),
    curvature = -0.4,
    arrow = arrow(type = "closed", length = unit(0.4, "cm")),
    linetype = "dashed",
    linewidth = 1.6,
    color = "#1e40af"
  ) +

  # ---- CENTER REPEAT CIRCLE ----
  geom_point(
    data = center,
    aes(x = x, y = y),
    size = 95, shape = 21,
    fill = "#2563eb",
    color = "#1e40af",
    stroke = 4
  ) +
  annotate(
    "text", x = 0, y = 0.1,
    label = "REPEAT",
    color = "white",
    fontface = "bold",
    size = 8
  ) +
  annotate(
    "text", x = 0, y = -0.15,
    label = "10,000\nTIMES",
    color = "white",
    fontface = "bold",
    size = 7
  ) +

  # ---- OUTER STEP CIRCLES ----
  geom_point(
    data = steps,
    aes(x = x, y = y),
    size = 48, shape = 21,
    fill = steps$fill,
    color = "white",
    stroke = 3
  ) +
  
  geom_text(
    data = steps,
    aes(x = x, y = y, label = label),
    color = "white",
    fontface = "bold",
    size = 5.4,
    hjust = 0.5,
    vjust = 0.5
  ) +

  # ---- STEP 5 (PILL SHAPE) ----
  geom_label(
    aes(x = 0, y = 1.85, label = "5. Compare & Conclude"),
    fill = "#06b6d4",
    color = "white",
    fontface = "bold",
    size = 5,
    label.size = 0,
    label.padding = unit(0.45, "lines")
  ) +

  # ---- THEME ----
  coord_fixed() +
  xlim(-2.6, 2.6) +
  ylim(-2.2, 2.3) +
  theme_void() +
  labs(
    title = "Design 1: Circular Process Flow",
    subtitle = "Permutation test logic with repeated resampling",
    caption = "Steps 2–4 repeat many times to create a null distribution without parametric assumptions."
  ) +
  theme(
    plot.title = element_text(size = 22, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 14, hjust = 0.5),
    plot.caption = element_text(size = 11, hjust = 0.5)
  )

How to Read This: Start at the top and follow the dashed arrows clockwise. The blue center shows the critical loop: we shuffle labels and recalculate the statistic 10,000 times, collecting results each time. At the end, we compare our real observed statistic to this distribution of 10,000 shuffled results. If our real result falls in the extreme 5% (the tails), we reject the null hypothesis—it’s too extreme to be random chance.

Lets all apply this to BLS Revisions Test 1: Mean Revision (Bootstrap Confidence Interval) Is the average revision biased away from zero?

Show code
library(infer)
library(DT)

# Bootstrap test: Calculate 95% CI for mean revision
ci_mean <- joined_data |>
  filter(!is.na(revision)) |>
  specify(response = revision) |>
  generate(reps = 10000, type = "bootstrap") |>
  calculate(stat = "mean") |>
  get_confidence_interval(level = 0.95)

# Display results
ci_mean_display <- ci_mean |>
  mutate(
    lower_ci = round(lower_ci, 1),
    upper_ci = round(upper_ci, 1),
    interpretation = "If CI contains 0, no bias; if it doesn't, mean is biased"
  ) |>
  select(lower_ci, upper_ci, interpretation)

datatable(
  ci_mean_display,
  caption = "Test 1: 95% Bootstrap Confidence Interval for Mean Revision",
  options = list(dom = "t", pageLength = 5),
  rownames = FALSE
)
Show code
# Visualize the bootstrap distribution
joined_data |>
  filter(!is.na(revision)) |>
  specify(response = revision) |>
  generate(reps = 10000, type = "bootstrap") |>
  calculate(stat = "mean") |>
  ggplot(aes(x = stat)) +
  geom_histogram(bins = 40, fill = "#3b82f6", alpha = 0.7, color = "white") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "#ef4444", linewidth = 1.2) +
  geom_vline(xintercept = ci_mean$lower_ci, linetype = "solid", color = "#10b981", linewidth = 1.2) +
  geom_vline(xintercept = ci_mean$upper_ci, linetype = "solid", color = "#10b981", linewidth = 1.2) +
  annotate("text", x = 0, y = Inf, label = "Zero (no bias)", 
           vjust = 1.5, hjust = -0.1, color = "#ef4444", fontface = "bold") +
  labs(
    title = "Bootstrap Distribution: Mean Revision",
    subtitle = "10,000 resamples with replacement | Green lines = 95% CI",
    x = "Mean Revision (jobs)",
    y = "Frequency"
  ) +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold", size = 15))

Interpretation: The 95% confidence interval tells us: if we repeated our bootstrap procedure many times, 95% of our confidence intervals would contain the true population parameter. If this interval includes zero, there’s no evidence of systematic bias.

Test 2: Median Revision (Permutation Test) Did the median revision magnitude change after 2020?

Show code
# Prepare data for permutation test
test_data_median <- joined_data |>
  filter(!is.na(revision)) |>
  mutate(
    post_2020 = year(date) >= 2020,
    abs_rev = abs(revision)
  )

# Calculate observed difference in medians
obs_med_diff <- test_data_median |>
  group_by(post_2020) |>
  summarise(med = median(abs_rev), .groups = "drop") |>
  summarise(diff = diff(med)) |>
  pull()

# Run permutation test
perm_median <- test_data_median |>
  specify(abs_rev ~ post_2020) |>
  hypothesize(null = "independence") |>
  generate(reps = 10000, type = "permute") |>
  calculate(stat = "diff in medians", order = c("TRUE", "FALSE"))

# Get p-value
p_val_median <- perm_median |> 
  get_p_value(obs_stat = obs_med_diff, direction = "two-sided")

# Results table
results_median <- data.frame(
  observed_difference = round(obs_med_diff, 1),
  p_value = round(p_val_median$p_value, 4),
  significant = ifelse(p_val_median$p_value < 0.05, "Yes (p < 0.05)", "No (p ≥ 0.05)")
)

datatable(
  results_median,
  caption = "Test 2: Permutation Test - Did Median Revision Change Post-2020?",
  options = list(dom = "t"),
  rownames = FALSE
)
Show code
# Visualize
perm_median |>
  visualize() +
  shade_p_value(obs_stat = obs_med_diff, direction = "two-sided") +
  labs(
    title = "Permutation Distribution: Difference in Medians (Post-2020 vs Pre-2020)",
    subtitle = "10,000 shuffled permutations | Red shaded area = extreme 5%",
    x = "Difference in Median Absolute Revision (jobs)",
    y = "Frequency"
  ) +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold", size = 15))

Interpretation: Under the null hypothesis (no real difference), we shuffle which observations belong to “pre-2020” vs “post-2020” and recalculate the median difference 10,000 times. If our real observed difference falls in the shaded tails (extreme 5%), we reject the null and conclude there IS a real change.

Test 3: Proportion of Negative Revisions (Permutation Test) Are negative revisions more common after 2020?

Show code
# Prepare data
test_data_prop <- joined_data |>
  filter(!is.na(revision)) |>
  mutate(
    post_2020 = year(date) >= 2020,
    is_negative = revision < 0
  )

# Calculate observed difference in proportions
obs_prop_diff <- test_data_prop |>
  group_by(post_2020) |>
  summarise(p = mean(is_negative), .groups = "drop") |>
  summarise(diff = diff(p)) |>
  pull()

# Run permutation test
perm_prop <- test_data_prop |>
  specify(is_negative ~ post_2020, success = "TRUE") |>
  hypothesize(null = "independence") |>
  generate(reps = 10000, type = "permute") |>
  calculate(stat = "diff in props", order = c("TRUE", "FALSE"))

# Get p-value
p_val_prop <- perm_prop |> 
  get_p_value(obs_stat = obs_prop_diff, direction = "two-sided")

# Results table
results_prop <- data.frame(
  observed_difference = paste0(round(obs_prop_diff * 100, 1), "%"),
  p_value = round(p_val_prop$p_value, 4),
  significant = ifelse(p_val_prop$p_value < 0.05, "Yes (p < 0.05)", "No (p ≥ 0.05)")
)

datatable(
  results_prop,
  caption = "Test 3: Permutation Test - More Negative Revisions Post-2020?",
  options = list(dom = "t"),
  rownames = FALSE
)
Show code
# Visualize
perm_prop |>
  visualize() +
  shade_p_value(obs_stat = obs_prop_diff, direction = "two-sided") +
  labs(
    title = "Permutation Distribution: Difference in Proportions Negative",
    subtitle = "10,000 shuffled permutations | Red shaded area = extreme 5%",
    x = "Difference in Proportion of Negative Revisions",
    y = "Frequency"
  ) +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold", size = 15))

Interpretation: This tests whether the fraction of downward revisions changed after 2020. We shuffle which months are labeled “post-2020” vs “pre-2020” and recalculate the proportion difference 10,000 times. If our real difference is extreme, we’ve found evidence of a real change in revision direction patterns.

Summary of Extra Credit:

This section demonstrates all four extra credit requirements:

Non-technical explanation: Computational inference uses simulations instead of formulas

Schematic visualization: The circular permutation test flowchart shows the actual process

Applied to BLS data: All three tests use real revision data Three different tests: Bootstrap CI (for means), Permutation test (for medians), Permutation test (for proportions)